Analysis on Reinfection of Sexually Transmitted Diseases

Yichu Chen, Gu Gong, Kate Jones, Gianni Spiga

2022-11-29

Libraries Used

library(survival)
library(KMsurv)
library(ggplot2)
library(ggpubr)
library(survminer)
library(plotly)
library(muhaz)
library(ggthemes)
library(plyr)
library(tidyr)
data(std)
std
## [1] FALSE

Exploratory Data Analysis

Checking Proportionality

Kaplan-Meier Curves

## Call:
## survdiff(formula = surv_object ~ iinfct, data = std)
## 
##                    N Observed Expected (O-E)^2/E (O-E)^2/V
## iinfct=gonorrhea 140       73     54.5   6.28042   7.50617
## iinfct=chlamydia 396      135    153.0   2.12201   3.81136
## iinfct=both      341      139    139.5   0.00166   0.00278
## 
##  Chisq= 8.5  on 2 degrees of freedom, p= 0.01

Hazard Ratios

Cumulative Hazards and CLogLog

Schoenfield Residuals for Initial Infection

## Call:
## coxph(formula = surv_object ~ iinfct, data = std)
## 
##   n= 877, number of events= 347 
## 
##                    coef exp(coef) se(coef)      z Pr(>|z|)   
## iinfctchlamydia -0.4202    0.6569   0.1457 -2.884  0.00393 **
## iinfctboth      -0.2980    0.7423   0.1450 -2.055  0.03984 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6569      1.522    0.4937    0.8741
## iinfctboth         0.7423      1.347    0.5587    0.9863
## 
## Concordance= 0.524  (se = 0.016 )
## Likelihood ratio test= 7.93  on 2 df,   p=0.02
## Wald test            = 8.37  on 2 df,   p=0.02
## Score (logrank) test = 8.46  on 2 df,   p=0.01

Finding the Best Model

The Full model

cox1 <-
  coxph(
    surv_object ~ iinfct + marital + race + os12m + os30d +
      rs12m + rs30d + abdpain + discharge + dysuria + condom + 
      itch + lesion + rash + lymph + vagina + dchexam + abnode +
      age + yschool + npartner,
    data = std
  )
summary(cox1)
## Call:
## coxph(formula = surv_object ~ iinfct + marital + race + os12m + 
##     os30d + rs12m + rs30d + abdpain + discharge + dysuria + condom + 
##     itch + lesion + rash + lymph + vagina + dchexam + abnode + 
##     age + yschool + npartner, data = std)
## 
##   n= 877, number of events= 347 
## 
##                      coef exp(coef)  se(coef)      z Pr(>|z|)   
## iinfctchlamydia -0.331853  0.717593  0.149264 -2.223  0.02620 * 
## iinfctboth      -0.263731  0.768180  0.149262 -1.767  0.07724 . 
## maritalMarried   0.054707  1.056231  0.431282  0.127  0.89906   
## maritalSingle    0.408462  1.504502  0.295237  1.384  0.16651   
## raceWhite       -0.112104  0.893951  0.141248 -0.794  0.42739   
## os12m1          -0.205985  0.813845  0.212025 -0.972  0.33129   
## os30d1          -0.337675  0.713427  0.238454 -1.416  0.15675   
## rs12m1           0.036907  1.037596  0.444944  0.083  0.93389   
## rs30d1          -0.194243  0.823458  0.565166 -0.344  0.73108   
## abdpain1         0.233865  1.263474  0.155288  1.506  0.13207   
## discharge1       0.113143  1.119792  0.114148  0.991  0.32159   
## dysuria1         0.162934  1.176960  0.155112  1.050  0.29352   
## condomnever     -0.263793  0.768133  0.119652 -2.205  0.02748 * 
## itch1           -0.149871  0.860819  0.154492 -0.970  0.33200   
## lesion1         -0.188159  0.828483  0.333106 -0.565  0.57217   
## rash1            0.003932  1.003940  0.392693  0.010  0.99201   
## lymph1          -0.030839  0.969632  0.547483 -0.056  0.95508   
## vagina1          0.349257  1.418013  0.174697  1.999  0.04559 * 
## dchexam1        -0.465508  0.627816  0.229914 -2.025  0.04290 * 
## abnode1          0.193753  1.213797  0.425224  0.456  0.64864   
## age              0.007855  1.007886  0.013891  0.565  0.57176   
## yschool         -0.127454  0.880334  0.039309 -3.242  0.00119 **
## npartner         0.076185  1.079162  0.054067  1.409  0.15881   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.7176     1.3935    0.5356    0.9615
## iinfctboth         0.7682     1.3018    0.5733    1.0292
## maritalMarried     1.0562     0.9468    0.4536    2.4596
## maritalSingle      1.5045     0.6647    0.8435    2.6835
## raceWhite          0.8940     1.1186    0.6778    1.1791
## os12m1             0.8138     1.2287    0.5371    1.2332
## os30d1             0.7134     1.4017    0.4471    1.1385
## rs12m1             1.0376     0.9638    0.4338    2.4818
## rs30d1             0.8235     1.2144    0.2720    2.4929
## abdpain1           1.2635     0.7915    0.9319    1.7130
## discharge1         1.1198     0.8930    0.8953    1.4006
## dysuria1           1.1770     0.8496    0.8684    1.5951
## condomnever        0.7681     1.3019    0.6076    0.9711
## itch1              0.8608     1.1617    0.6359    1.1652
## lesion1            0.8285     1.2070    0.4313    1.5916
## rash1              1.0039     0.9961    0.4650    2.1675
## lymph1             0.9696     1.0313    0.3316    2.8355
## vagina1            1.4180     0.7052    1.0069    1.9970
## dchexam1           0.6278     1.5928    0.4001    0.9852
## abnode1            1.2138     0.8239    0.5275    2.7932
## age                1.0079     0.9922    0.9808    1.0357
## yschool            0.8803     1.1359    0.8151    0.9508
## npartner           1.0792     0.9266    0.9707    1.1998
## 
## Concordance= 0.634  (se = 0.016 )
## Likelihood ratio test= 73.29  on 23 df,   p=4e-07
## Wald test            = 69.84  on 23 df,   p=1e-06
## Score (logrank) test = 71.68  on 23 df,   p=7e-07

Simplifying the Model

Residuals

Martingale

Transformation of Quantitative to a Categorical Variable

## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam + 
##     yschool, data = std)
## 
##   n= 877, number of events= 347 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## iinfctchlamydia -0.38468   0.68067  0.14618 -2.632  0.00850 ** 
## iinfctboth      -0.24681   0.78129  0.14561 -1.695  0.09006 .  
## condomnever     -0.29779   0.74245  0.11551 -2.578  0.00994 ** 
## vagina1          0.40660   1.50171  0.16739  2.429  0.01514 *  
## dchexam1        -0.37042   0.69044  0.22123 -1.674  0.09405 .  
## yschool         -0.14357   0.86626  0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6807     1.4692    0.5111    0.9065
## iinfctboth         0.7813     1.2799    0.5873    1.0393
## condomnever        0.7425     1.3469    0.5920    0.9311
## vagina1            1.5017     0.6659    1.0817    2.0848
## dchexam1           0.6904     1.4483    0.4475    1.0652
## yschool            0.8663     1.1544    0.8115    0.9247
## 
## Concordance= 0.603  (se = 0.017 )
## Likelihood ratio test= 45.32  on 6 df,   p=4e-08
## Wald test            = 46.51  on 6 df,   p=2e-08
## Score (logrank) test = 46.77  on 6 df,   p=2e-08

dfBeta

Outliers

In Conclusion

Our Best Model

## Call:
## coxph(formula = surv_object ~ iinfct + condom + vagina + dchexam + 
##     yschool, data = std)
## 
##   n= 877, number of events= 347 
## 
##                     coef exp(coef) se(coef)      z Pr(>|z|)    
## iinfctchlamydia -0.38468   0.68067  0.14618 -2.632  0.00850 ** 
## iinfctboth      -0.24681   0.78129  0.14561 -1.695  0.09006 .  
## condomnever     -0.29779   0.74245  0.11551 -2.578  0.00994 ** 
## vagina1          0.40660   1.50171  0.16739  2.429  0.01514 *  
## dchexam1        -0.37042   0.69044  0.22123 -1.674  0.09405 .  
## yschool         -0.14357   0.86626  0.03332 -4.308 1.64e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## iinfctchlamydia    0.6807     1.4692    0.5111    0.9065
## iinfctboth         0.7813     1.2799    0.5873    1.0393
## condomnever        0.7425     1.3469    0.5920    0.9311
## vagina1            1.5017     0.6659    1.0817    2.0848
## dchexam1           0.6904     1.4483    0.4475    1.0652
## yschool            0.8663     1.1544    0.8115    0.9247
## 
## Concordance= 0.603  (se = 0.017 )
## Likelihood ratio test= 45.32  on 6 df,   p=4e-08
## Wald test            = 46.51  on 6 df,   p=2e-08
## Score (logrank) test = 46.77  on 6 df,   p=2e-08